# Check the file path
here("nhanes_class_dataset.rda")
## [1] "/cloud/project/nhanes_class_dataset.rda"
# Load the saved R data
load(here("nhanes_class_dataset.rda"))
# Specify the dataset, variables on the x-axis, y-axis
ggplot(nhanes,
aes(x = RIDAGEYR,
y = log10(LBXBPB))) +
geom_point()
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## Warning: Removed 2370 rows containing missing values (geom_point).
# Update axes labels
ggplot(nhanes,
aes(x = RIDAGEYR,
y = log10(LBXBPB))) +
geom_point() +
labs(x = "Age in years",
y = "log10(blood Pb in ug/dL")
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## Warning: Removed 2370 rows containing missing values (geom_point).
# Change point size, transparency, shape
ggplot(nhanes,
aes(x = RIDAGEYR,
y = log10(LBXBPB))) +
geom_point(size = 0.3,
alpha = 0.2,
shape = 2) +
labs(x = "Age in years",
y = "log10(blood Pb in ug/dL")
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## Warning: Removed 2370 rows containing missing values (geom_point).
# Add color variable
ggplot(nhanes,
aes(x = RIDAGEYR,
y = log10(LBXBPB),
color = sex)) +
geom_point(size = 0.4,
alpha = 0.2,
shape = 2) +
labs(x = "Age in years",
y = "log10(blood Pb in ug/dL",
color = "Sex")
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## Warning: Removed 2370 rows containing missing values (geom_point).
# Change color palette
ggplot(nhanes,
aes(x = RIDAGEYR,
y = log10(LBXBPB),
color = sex)) +
geom_point(size = 0.4,
alpha = 0.2,
shape = 2) +
labs(x = "Age in years",
y = "log10(blood Pb in ug/dL",
color = "Sex") +
scale_color_viridis_d()
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## Warning: Removed 2370 rows containing missing values (geom_point).
# Try another color palette
ggplot(nhanes,
aes(x = RIDAGEYR,
y = log10(LBXBPB),
color = sex)) +
geom_point(size = 0.4,
alpha = 0.2,
shape = 2) +
labs(x = "Age in years",
y = "log10(blood Pb in ug/dL",
color = "Sex") +
scale_color_brewer(palette = "Spectral")
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## Warning: Removed 2370 rows containing missing values (geom_point).
# Change theme to preset features
ggplot(nhanes,
aes(x = RIDAGEYR,
y = log10(LBXBPB),
color = sex)) +
geom_point(size = 0.4,
alpha = 0.2,
shape = 2) +
labs(x = "Age in years",
y = "log10(blood Pb in ug/dL",
color = "Sex") +
theme_classic()
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## Warning: Removed 2370 rows containing missing values (geom_point).
# Manually change plot theme features
ggplot(nhanes,
aes(x = RIDAGEYR,
y = log10(LBXBPB),
color = sex)) +
geom_point(size = 0.4,
alpha = 0.2,
shape = 2) +
labs(x = "Age in years",
y = "log10(blood Pb in ug/dL",
color = "Sex") +
theme(axis.text = element_text(color = "black", size = 10), #color and text size of axis labels
axis.title = element_text(size = 12), #text size of axis titles
legend.title = element_text(size = 12), #text size of legend title
legend.text = element_text(size = 10)) #text size of legend labels
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## Warning: Removed 2370 rows containing missing values (geom_point).
Make a scatter plot of cotinine (LBXCOT) on the x-axis and cadmium (LBXBCD) on the y-axis. Be sure to include axis labels with units. Experiment with changing the color, size, and transparency of the points.
# Make a simple barplot
ggplot(nhanes,
aes(x = sex)) +
geom_bar()
# Add some colors
ggplot(nhanes,
aes(x = sex,
fill = sex,
color = sex)) +
geom_bar()
# Turn the plot horizontally
ggplot(nhanes,
aes(x = sex,
fill = sex,
color = sex)) +
geom_bar() +
coord_flip()
# Add labels
ggplot(nhanes,
aes(x = sex,
fill = sex,
color = sex)) +
geom_bar() +
labs(title = "Barplot by sex",
x = "Sex",
y = "Number of Participants")
# Manually set the colors
ggplot(nhanes,
aes(x = sex,
fill = sex)) +
geom_bar() +
labs(title = "Barplot by sex",
x = "Sex",
y = "Number of Participants") +
scale_fill_manual(values=c("#999999", "#E69F00"))
# Plot two variables at once in stacked barchart
ggplot(nhanes,
aes(x = sex,
fill = education)) +
geom_bar() +
labs(title = "Barplot by sex and education",
x = "Sex",
y = "Number of Participants")
# Histogram
ggplot(nhanes,
aes(x = LBXRBCSI)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1726 rows containing non-finite values (stat_bin).
# Adjust size of boxes
ggplot(nhanes,
aes(x = LBXRBCSI)) +
geom_histogram(binwidth = 0.25) +
labs("Red Blood Cell Count (million cells/uL)")
## Warning: Removed 1726 rows containing non-finite values (stat_bin).
# Overlapping histograms
ggplot(nhanes,
aes(x = LBXIRN,
fill = sex)) + #stratify by sex
geom_histogram(binwidth = 10,
position = "identity") #default position is stack
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## Warning: Removed 3332 rows containing non-finite values (stat_bin).
# Make a black and white plot with transparency
ggplot(nhanes,
aes(x = LBXIRN,
fill = sex)) +
geom_histogram(binwidth = 10,
position = "identity",
alpha = 0.8) + #transparency
scale_fill_grey(start = 0,
end = 0.75)
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## Warning: Removed 3332 rows containing non-finite values (stat_bin).
# Density plot
ggplot(nhanes,
aes(x = LBXRBCSI)) +
geom_density(size=2,
color="dodgerblue") +
labs(x = "Red blood cell count (million cells/uL)")
## Warning: Removed 1726 rows containing non-finite values (stat_density).
# Add density plot to a histogram
ggplot(nhanes,
aes(x = LBXRBCSI, y = ..density..)) +
geom_histogram(fill = "lightgreen",
color = "seagreen",
binwidth = 0.25) +
geom_density()
## Warning: Removed 1726 rows containing non-finite values (stat_bin).
## Warning: Removed 1726 rows containing non-finite values (stat_density).
# Add a linear fit
ggplot(nhanes,
aes(x = RIDAGEYR,
y = log10(LBXBPB))) +
geom_point() +
geom_smooth(method = lm)
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 2370 rows containing non-finite values (stat_smooth).
## Warning: Removed 2370 rows containing missing values (geom_point).
# Add a smoothing line
ggplot(nhanes,
aes(x = RIDAGEYR,
y = log10(LBXBPB))) +
geom_point() +
geom_smooth()
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 2370 rows containing non-finite values (stat_smooth).
## Warning: Removed 2370 rows containing missing values (geom_point).
# Split by another variable
ggplot(nhanes,
aes(x = RIDAGEYR,
y = log10(LBXBPB),
shape = sex)) +
geom_point(aes(color = sex,
shape = sex),
size = 1,
alpha = 0.3) +
geom_smooth(method = lm,
aes(color = sex),
size = 1.5) +
labs(title = "Log(blood lead) by Age and Sex",
x = "Age (years)",
y = "log(blood lead)")
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 2370 rows containing non-finite values (stat_smooth).
## Warning: Removed 2370 rows containing missing values (geom_point).
# Boxplot
ggplot(nhanes,
aes(x = LBXIRN)) +
geom_boxplot(fill = "#990000",
color = "#3366FF",
notch = T,
notchwidth = .3)
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## Warning: Removed 3332 rows containing non-finite values (stat_boxplot).
# Boxplot split by age groups
ggplot(nhanes,
aes(x = age_groups,
y = LBXIRN)) +
geom_boxplot()
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## Warning: Removed 3332 rows containing non-finite values (stat_boxplot).
# Boxplot split by age and sex, with colors!
ggplot(nhanes,
aes(x = age_groups,
y = LBXIRN)) +
geom_boxplot(aes(fill = sex)) +
labs(title = "Boxplot of Iron Levels",
x="Age",
y="Log(Iron levels)")
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## Warning: Removed 3332 rows containing non-finite values (stat_boxplot).
# Basic violin plot
ggplot(nhanes,
aes(x = age_groups,
y = log(LBXBPB))) +
geom_violin()
## Warning: Removed 2370 rows containing non-finite values (stat_ydensity).
# Add beeswarm data points on top
ggplot(nhanes,
aes(x = age_groups,
y = log(LBXBPB))) +
geom_violin() +
geom_jitter(shape = 16, #circle
position = position_jitter(0.2),
alpha = 0.25)
## Warning: Removed 2370 rows containing non-finite values (stat_ydensity).
## Warning: Removed 2370 rows containing missing values (geom_point).
# Add colors
ggplot(nhanes,
aes(x = age_groups,
y = log(LBXBPB),
fill = age_groups)) +
geom_violin() +
geom_jitter(shape = 16, #circle
position = position_jitter(0.2),
alpha = 0.25) +
scale_fill_brewer(palette = "Blues") +
labs(title = "Violin plots by age group",
x = "Age Group (years)",
y = "Log(Blood Lead) (ug/dL)")
## Warning: Removed 2370 rows containing non-finite values (stat_ydensity).
## Warning: Removed 2370 rows containing missing values (geom_point).
# Stratified histograms in separate plots
ggplot(nhanes,
aes(x = LBXIRN,
fill = sex)) +
geom_histogram() +
facet_wrap(vars(sex),
ncol = 1)
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3332 rows containing non-finite values (stat_bin).
# Assign plots from earlier as objects
viol_plot <- ggplot(nhanes,
aes(x = age_groups,
y = log(LBXBPB),
fill = age_groups)) +
geom_violin() +
geom_jitter(shape = 16, #circle
position = position_jitter(0.2),
alpha = 0.25) +
scale_fill_brewer(palette = "Blues") +
labs(title = "Violin plots by age group",
x = "Age Group (years)",
y = "log(blood lead) (ug/dL)")
scatter_plot <- ggplot(nhanes,
aes(x = RIDAGEYR,
y = log10(LBXBPB),
shape = sex)) +
geom_point(aes(color = sex,
shape = sex),
size = 1,
alpha = 0.5) +
geom_smooth(method = lm,
aes(color = sex)) +
labs(title = "Log(blood lead) by age and sex",
x = "Age (years)",
y = "log(blood lead)")
bar_plot <- ggplot(nhanes,
aes(x = sex,
fill = sex)) +
geom_bar() +
labs(title = "Barplot by sex",
x = "Sex",
y = "Number of Participants") +
scale_fill_manual(values=c("#999999", "#E69F00"))
facet_plot <- ggplot(nhanes,
aes(x = LBXIRN,
fill = sex)) +
geom_histogram() +
facet_wrap(vars(sex),
ncol = 1) +
labs(title = "Histogram of iron by sex",
x = "Serum Iron in ug/dL")
# View the four plots on one page
ggarrange(bar_plot, viol_plot, scatter_plot, facet_plot,
ncol = 2,
nrow = 2,
labels = LETTERS[1:4]) #add letters A-D as figure labels
## Warning: Removed 2370 rows containing non-finite values (stat_ydensity).
## Warning: Removed 2370 rows containing missing values (geom_point).
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 2370 rows containing non-finite values (stat_smooth).
## Warning: Removed 2370 rows containing missing values (geom_point).
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3332 rows containing non-finite values (stat_bin).
# Assign compiled plot object
compiled_plots <- ggarrange(bar_plot, viol_plot, scatter_plot, facet_plot,
ncol = 2,
nrow = 2,
labels = LETTERS[1:4])
## Warning: Removed 2370 rows containing non-finite values (stat_ydensity).
## Warning: Removed 2370 rows containing missing values (geom_point).
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 2370 rows containing non-finite values (stat_smooth).
## Warning: Removed 2370 rows containing missing values (geom_point).
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3332 rows containing non-finite values (stat_bin).
# Recommended to save figures as both pdf for high-quality viewing and png for presentations
# Save the plot as a pdf for viewing at a high resolution
ggsave(filename = here("compiled_nhanes_plots.pdf"),
plot = compiled_plots,
width = 14,
height = 9)
# Save the plot as a png for presentation with lower resolution
ggsave(filename = here("compiled_nhanes_plots.png"),
plot = compiled_plots,
units = "in",
width = 14,
height = 9,
dpi = 300)